# code for installing libaries
import subprocess
import sys
import importlib
# List of required packages
required_packages = ["requests", "skyfield","datetime","numpy", "geopandas", "shapely","json","matplotlib","pyproj" , "contextily", "mpl_toolkits", "mapclassify", "folium"] # Add all required packages here
# defining Function to install packages
def install_package(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
# Checking and installing missing packages
for package in required_packages:
try:
# Tries to import the package
importlib.import_module(package)
except ImportError:
# If the package is not found, this 'll install it
print(f"{package} not found, installing...")
install_package(package)
# below message is differently coded outputAssignment 1 - ENVS456
Installing Libaries
All required packages are installed.
Introduction
Analysis of Spatial Datasets through APIs
In the rapidly evolving field of Geoinforamtics to address complex scientific questions, the ability to access, analyze, and inference from spatial datasets stands as a cornerstone of research and development. Scientists around the world, trying to gain deeper understanding of many complex geographical phenomena, the dependence on accurate, latest and real-time spatial data has never been more critical. This Assignment uses richness of APIs (Application Programming Interfaces) to access different spatial datasets, offering opportunity to explore satellites/ objects patterns over geographic area.
APIs work as gatekeepers for programmatically retrieving data from remote servers, enabling researchers to tap into a vast amount of spatial data resources maintained by governments, non-profits, and private organizations. There are many types of APIs, but we are going to use RESTful API which is an type of API that relies on HTTP protocol to retrieve information from remote databases. Not all APIs which use HTTP protocol are RESTful.
RESTful APIs means APIs which adheres to principles and constraints of REST, stands for Representational State Transfer, starting with Null style, which are client-server architecture, statelessness, cacheability, uniform interface, layered system and Code-On-Demand(optional) (Fielding, 2000). Terms ‘REST APIs’ and ‘RESTful APIs’ are often used interchangeably in context of web services, saying an APIs is RESTful typically implies strict compliance with REST principles or to REST Architectural Constraints. By leveraging API technology,
Finally, This assignment aims to analyze active satellites overhead for each country, same methodology can be implemented to track space objects overhead. It also outlines methodology used in accessing and utilizing various spatial datasets for analysis through APIs.
Datasets
Opendatasoft’s Explore API was used to get geographical boundaries of all countries in GeoJSON format. It has limit of getting boundaries of 100 countries in one API Call. This dataset is called World Administrative Boundaries - Countries and Territories in Opendatasoft’s catalog (). Then, Active Satellites information is collected using API developed by Celes Trak (CelesTrak, 2024), This is also the data providers for TLE API which is one of the NASA APIs that provides two line element set data for earth orbiting objects in JSON format (NASA Open APIs, 2024).
# Importing Required Packages with aliases and functions
import requests
from skyfield.api import load, wgs84, EarthSatellite
from datetime import datetime, timedelta
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, shape, Polygon, MultiPolygon
import json
from shapely.ops import transform
import pyproj
import matplotlib.pyplot as plt
import contextily as ctx
import folium
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
from mapclassify import Quantiles
import matplotlib.font_manager as fmTLE or rarely known as Two-Line Element Set
Three-Line Element Set (TLE) data is a type of format used to store orbital parameters of Earth-orbiting objects, commonly used by the satellite tracking and astronomy discipline. It is Developed by NORAD and NASA, a TLE consists of two lines of alphanumerical data that represent a satellite’s orbital parameters at a specific point in time, enabling the prediction of its future positions. Each line contains crucial information such as the satellite’s inclination, right ascension of the ascending node, eccentricity, argument of perigee, mean anomaly, mean motion, and the epoch of the element set (Space-Track.org, 2024). This format is instrumental for satellite tracking software to calculate satellite positions and velocities in the orbits of Earth. TLE data is widely used for various applications, including satellite communication, navigation, space debris tracking, and amateur satellite observation, making it a fundamental resource for understanding and monitoring man-made objects in Earth orbit.
Below we are going get TLE data for all active satellites orbiting the earth, using an API developed by Celes Trak(CelesTrak, 2024). Resulting response will be stored as plain text format. Then, we create an dataframe with satellite name, Line1 , Line 2 as columns.
# Using Celes Trak, to get TLE Data for active satellites
try:
Sat_TLE = requests.get("https://celestrak.org/NORAD/elements/gp.php?GROUP=active&FORMAT=tle")
if Sat_TLE.status_code == 200:
print("Data Retrieved successfully.")
except requests.exceptions.RequestException as e:
# This will catch HTTP errors, connection errors, etc
print(f"An error occurred: {e}")
# first we split after every line, then we strip '\r' at end of every line
tle_lines = [line.strip('\r') for line in Sat_TLE.text.strip().split('\n')]
# Checking if the number of lines is a multiple of 3
if len(tle_lines) % 3 != 0:
print("Warning: The TLE data might be incomplete or corrupted.")
satellites = [tle_lines[i:i+3] for i in range(0, len(tle_lines), 3)]Data Retrieved successfully.
satellites_data = []
for sat in satellites:
# Each item in satellites list is a list with three elements: name, line1, line2
satellite_info = {
'name': sat[0],
'line1': sat[1],
'line2': sat[2]
}
satellites_data.append(satellite_info)
satellites_df = pd.DataFrame(satellites_data);
satellites_df.head()| name | line1 | line2 | |
|---|---|---|---|
| 0 | CALSPHERE 1 | 1 00900U 64063C 24066.46276684 .00000999 0... | 2 00900 90.1969 53.1933 0024031 293.5009 184... |
| 1 | CALSPHERE 2 | 1 00902U 64063E 24067.03319064 .00000061 0... | 2 00902 90.2105 56.7404 0020203 120.1107 309... |
| 2 | LCS 1 | 1 01361U 65034C 24066.20400602 .00000011 0... | 2 01361 32.1460 354.8151 0004780 7.1482 352... |
| 3 | TEMPSAT 1 | 1 01512U 65065E 24066.43969899 .00000056 0... | 2 01512 89.9487 214.5191 0071439 67.8035 357... |
| 4 | CALSPHERE 4A | 1 01520U 65065H 24066.59735979 .00000157 0... | 2 01520 89.9531 128.5739 0066944 292.8547 195... |
Satellite Coordinates using Skyfield Library
Skyfield library provides precise astronomical calculations for users, offering a interface to compute positions of planets, stars, and satellites within the solar system and beyond. Leveraging state-of-the-art algorithms and astronomical theories, Skyfield translates complex celestial mechanics into straightforward Python code, enabling the calculation of celestial events, satellite orbits (using TLE data), and astrophotography planning with high accuracy. This library is Ideal for our case, as we need to calculate satellites coordinates in given point in time, then it is possible to estimate satellites overhead per country at that instance.
ts = load.timescale() # timescale object from skyfield API, used to create time objects like UTC, delphi date
start_datetime = datetime(2024, 3, 4, 0, 0, 0) # datetime object of 04 march 2024.
# we want time intervals as column headers, so creating an times variable
times = [(start_datetime + timedelta(minutes=240*i)).strftime('%Y-%m-%dT%H:%M:%SZ') for i in range(6)]
# Defining Function to calculate satellite positions
def calculate_positions(name, line1, line2):
# Creating satellite object from TLE lines using Skyfield Library
satellite = EarthSatellite(line1, line2, name, ts)
# saving an particular the start date
year, month, day = 2024, 3, 4
# Generating a list of times at 240-minute intervals over 24 hours i.e. for every four hours
times = ts.utc(year, month, day, 0, range(0, 1440, 240))
# Getting the satellite's position for every four hours using satellite object
geocentric = satellite.at(times)
subpoint = wgs84.subpoint(geocentric)
lons, lats = subpoint.longitude.degrees, subpoint.latitude.degrees
# at end, returns a list of formatted positions (longitude, latitude) to be consitent with shapley objects
return [f"{lon},{lat}" for lon, lat in zip(lons, lats)]
positions_dict = {} # defining empty dict to store satellite coords at diff time
for index, row in satellites_df.iterrows():
positions = calculate_positions(row['name'], row['line1'], row['line2'])
positions_dict[row['name']] = positions# Converting above dictionary to a DataFrame
positions_df = pd.DataFrame.from_dict(positions_dict, orient='index', columns=times) # times variable is used for column names
# Resetting the index to move the satellite names into a regular column
positions_df.reset_index(inplace=True)
# Renaming the new column (previously the index) to 'Satellite'
positions_df.rename(columns={'index': 'Satellite'}, inplace=True)To have computational efficiency, We used dictionary type object in intermediate process of function (calculate_positions). Latitude and Longitude are stored as string dataframe(postions_df). We will convert every coords which are as strings to Point Geometry using Shapely Library.
Note: In python, we use (longtiude, latitude). Below code splits the string for each value when encountered an comma and converts to float-type data before passing into Point Function in Shapely Library.
for timestamp in times:
positions_df[timestamp] = positions_df[timestamp].apply(lambda x: Point([float(coord) for coord in x.split(',')]))positions_df.head()| Satellite | 2024-03-04T00:00:00Z | 2024-03-04T04:00:00Z | 2024-03-04T08:00:00Z | 2024-03-04T12:00:00Z | 2024-03-04T16:00:00Z | 2024-03-04T20:00:00Z | |
|---|---|---|---|---|---|---|---|
| 0 | CALSPHERE 1 | POINT (70.90660296193354 3.7158649204721845) | POINT (-168.2230109900412 -79.3722281604938) | POINT (130.47883777703515 25.49233304337693) | POINT (-109.35040111178193 50.56658383501217) | POINT (-170.0184818555208 -53.94570821806006) | POINT (-49.83228886040186 -21.69938786339812) |
| 1 | CALSPHERE 2 | POINT (-106.04522979299136 66.0981297857472) | POINT (14.353289669368113 22.48952566357138) | POINT (-46.43833895820065 -68.951960658235) | POINT (74.02369011926766 -20.55807177289187) | POINT (13.18149034330885 70.84603085910078) | POINT (133.69052095595006 17.695467848233477) |
| 2 | LCS 1 | POINT (-103.73793219985143 28.275895642548118) | POINT (77.86859020381276 -28.504394266699823) | POINT (-111.00718163893485 4.824675764611408) | POINT (56.45280892389135 22.21917838120799) | POINT (-123.61622597210805 -31.793924004348426) | POINT (52.124324310310705 13.783768480659568) |
| 3 | TEMPSAT 1 | POINT (-127.6281109549757 -60.15136124263363) | POINT (-7.927512238827062 -41.61642564191508) | POINT (-68.00651525619188 38.61770438385358) | POINT (51.6960768856886 60.85531663368477) | POINT (-8.359777281330288 -19.156520843701077) | POINT (111.07009588726672 -82.49206121312446) |
| 4 | CALSPHERE 4A | POINT (146.4287344452664 -62.92172296394485) | POINT (-93.86073668305397 -35.400617125788244) | POINT (-153.9434423479504 46.50453062900567) | POINT (-34.22041900967933 53.366512005072416) | POINT (-94.29846233060641 -27.457520890430107) | POINT (25.375801967620593 -71.12113770784784) |
Rationale for CRS
As this assignment covered, satellites overhead per country, which involves calculating coordinates of satellites i.e. Latitude and Longitude. We are going to use EPSG:4326-WGS84 as CRS for our spatial analyses, also for its compatibility with global positioning systems (GPS) and web mapping services. WGS84 is cordinate reference system for GPS satellites (GISGeography, 2023).
World Administrative Boundaries
OpenDataSoft is a comprehensive website designed to ease the process of publishing, sharing, and utilization of data by organizations and governments. It facilitates users to have access to a wide range of datasets across various domains, enhancing innovation, and collaboration. This API provided by OpenDataSoft, offers access to detailed geographic boundaries of countries and administrative regions around the globe ().
base_url = 'https://public.opendatasoft.com/api/explore/v2.1/catalog/datasets/world-administrative-boundaries/records'
total_count = 256 # Total no.of countries exists, according to opendatasoft server
limit = 100 # Number of results per API call, According to opendatasoft site
offset = 0 # starting at the beginning
all_countries = [] # empty list for adding info
while offset < total_count:
response = requests.get(base_url, params={'limit': limit, 'offset': offset})
if response.status_code == 200:
data = response.json() # converting into JSON
all_countries.extend(data['results']) # Extracting 'results' item in list
offset += limit # For API call to get next set of results
# We dont need to trim all_countries because API has correctly 256 countries, no additional polygons
else:
print(f"Failed to fetch data: HTTP {response.status_code}")
break # will exit the loop in case of an error
# Now, `all_countries` contains data for up to 256 countries, fetched in chunks of 100The GeoJSON retrieved from the OpenDataSoft API for world administrative boundaries deviates from the standard naming in GeoJSON structure, which needed an additional cleaning process to ensure compatibility. To address these issues, like features are stored as results in GeoJSON, So I modified response by reformatting the GeoJSON to align with the standards as below.
# Assuming `data` is your JSON object
results = all_countries
# Preparing lists to hold data from 'results'
geometries = []
properties = []
for item in results:
if 'geo_shape' in item and 'geometry' in item['geo_shape'] and 'name' in item:
geom = shape(item['geo_shape']['geometry'])
geometries.append(geom)
properties.append(item['name'])
else:
geometries.append(None)
properties.append(None)
countries_df = pd.DataFrame({'Countries' : properties})
# Converting the DataFrame to a GeoDataFrame
countries_gdf = gpd.GeoDataFrame(countries_df, geometry=geometries);
countries_gdf.set_crs(epsg=4326, inplace=True) # setting crs to EPSG: 4326
countries_gdf.head()| Countries | geometry | |
|---|---|---|
| 0 | Turks and Caicos Islands | MULTIPOLYGON (((-71.12757 21.44263, -71.14605 ... |
| 1 | Bolivia | POLYGON ((-58.15889 -20.16806, -58.13722 -20.1... |
| 2 | Cuba | MULTIPOLYGON (((-82.54460 21.57389, -82.59862 ... |
| 3 | China | MULTIPOLYGON (((110.71583 20.06888, 110.77859 ... |
| 4 | Iraq | POLYGON ((44.78734 37.14971, 44.76617 37.11228... |
As we can see from the ‘geometry’ column, some of them are polygons, we need to ensure consistency for mapping and visualizations So, we convert to polygons into MultiPolygon. Conversion function, defined, is hided in cell.
# Applying the conversion function (hided in above cell ) to the 'geometry' column of your GeoDataFrame
countries_gdf['geometry'] = countries_gdf['geometry'].apply(ensure_multipolygon);Map Projection Problem
The issue of map projections where geography of countries extending into the International Date Line presents a challenge in plotting latitude and longitude on 2D maps, particularly for areas like east russia near the 180-degree meridian. This problem arises due to the representation of the Earth’s surface on a two-dimensional plane which requires distortions, and the International Date Line acts as a discontinuity for spatial datasets. As a result, geographical features that span across this meridian may appear fragmented or inaccurately placed on either side of the map. Below code rectifies this issue and adds geometry back to Geo dataframe(countries_gdf).
# Before Shift function, and it is hided in above chunk
russia_gdf_bs= countries_gdf[countries_gdf['Countries'] == 'Russian Federation']
# Applying the shift function to the Russian geometries.
russia_gdf = countries_gdf[countries_gdf['Countries'] == 'Russian Federation'].copy()
russia_gdf['geometry'] = russia_gdf['geometry'].apply(lambda geom: shift_geom(geom, 180))fig, axs = plt.subplots(1, 2, figsize=(15, 14)) # 1 row, 2 columns
# first GeoDataFrame
russia_gdf_bs.plot(ax=axs[0], color='blue') # Plot on the first subplot
axs[0].set_title('Geography of Russia on 2D Map - Before using Shift Function')
# Increasing the y-limit of the left plot
ymin, ymax = axs[0].get_ylim()
axs[0].set_ylim(ymin*0.6, ymax * 1.4)
# second GeoDataFrame
russia_gdf.plot(ax=axs[1], color='green') # Plot on the second subplot
axs[1].set_title('Geography of Russia on 2D Map - After using Shift Function')
plt.tight_layout()
plt.show()Estimation of Satellites Overhead
In this assignment, I am interested to analyze the spatial distribution of satellite positions over time and their intersection with various countries around the globe. The core of below process involves iterating over a series of timestamps, each representing a distinct snapshot of satellite positions. For each timestamp, we extracted corresponding satellite position data and transformed it into a GeoDataFrame. This transformation allowed to perform a spatial join with a pre-existing GeoDataFrame of country boundaries, effectively determining which satellites were located within the boundaries of each country at any given moment.
Then, we aggregated these results to compute the count of satellites per country for each timestamp, generating a Series object that mapped each country to its respective satellite count. These counts were then combined into a new DataFrame, where columns represented individual timestamps and rows corresponded to different countries. This DataFrame can serve as a temporal satellite count matrix, capturing the dynamics of satellite distributions across countries over time.
To facilitate visualization of static and interactive maps, we merged this matrix with our original GeoDataFrame of country boundaries. This merge operation creates a unified dataset that has spatial resolution of countries and temporal resolution of 4 hours. Due to constraints of assignment, We calculated satellites distribution with temporal resolution of 4 hours. This resultant spatial dataset can be used to find insights into various issues regarding global digital repression, mass surveillance etc.
# temporal satellite count matrix estimtion
# empty dict var
counts_dict = {}
for timestamp in positions_df.columns[1:]: # Skipping the 'name' column
# creating copy of first(Satellite), second col of postions_df
temp_df = positions_df[['Satellite', timestamp]].copy()
temp_df.rename(columns={timestamp: 'position'}, inplace=True)
# Convert the DataFrame to a GeoDataFrame
temp_gdf = gpd.GeoDataFrame(temp_df, geometry='position')
temp_gdf.set_crs(epsg=4326, inplace=True) # Assuming positions are in lat/lon
countries_gdf_copy = countries_gdf_copy.dropna(subset=['Countries'])
# Performing spatial join with countries_gdf to find positions within each country
joined_gdf = gpd.sjoin(temp_gdf, countries_gdf_copy, how="inner", predicate='intersects')
counts_series = joined_gdf.groupby('Countries').size() # Placeholder for your count computation logic
counts_series = counts_series.reindex(countries_gdf_copy['Countries'].unique(), fill_value=0)
counts_dict[timestamp] = counts_series
# Converting the dictionary of Series to a DataFrame
counts_df = pd.DataFrame(counts_dict)
# Ensuring the index of counts_df matches countries_gdf (e.g., country names)
# Reset the index of counts_df
counts_df_reset = counts_df.reset_index()
counts_df_reset.rename(columns={'index': 'Countries'}, inplace=True)
# Merge counts_df_reset with countries_gdf
temporal_satellite_cm = pd.merge(countries_gdf_copy, counts_df_reset, how='left', on='Countries')
temporal_satellite_cm.head()| Countries | geometry | 2024-03-04T00:00:00Z | 2024-03-04T04:00:00Z | 2024-03-04T08:00:00Z | 2024-03-04T12:00:00Z | 2024-03-04T16:00:00Z | 2024-03-04T20:00:00Z | |
|---|---|---|---|---|---|---|---|---|
| 0 | Turks and Caicos Islands | MULTIPOLYGON (((-71.12757 21.44263, -71.14605 ... | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | Bolivia | MULTIPOLYGON (((-58.15889 -20.16806, -58.13722... | 20 | 19 | 10 | 12 | 24 | 15 |
| 2 | Cuba | MULTIPOLYGON (((-82.54460 21.57389, -82.59862 ... | 3 | 1 | 0 | 3 | 5 | 2 |
| 3 | China | MULTIPOLYGON (((110.71583 20.06888, 110.77859 ... | 190 | 222 | 195 | 180 | 214 | 202 |
| 4 | Iraq | MULTIPOLYGON (((44.78734 37.14971, 44.76617 37... | 9 | 9 | 5 | 9 | 6 | 8 |
Data Classifications
# plot_classification function hided above
plot_classifications(temporal_satellite_cm, '2024-03-04T00:00:00Z')After observing distribution of satellites distribution at 2024-03-04T00:00:00Z , To distinguish High satellites overhead from low, We can plot either Natural Breaks or fisher jenks classification. Therefore, We are going to plot Natural Breaks classification map along with normal map.
fig, axs = plt.subplots(2, 1, figsize=(20, 15)) # 1 row, 2 columns for side by side plots
# Plot 1: Simple color map
temporal_satellite_cm.plot(column='2024-03-04T00:00:00Z', ax=axs[0], legend=True,
cmap='Greens', edgecolor='black', lw=0.6)
axs[0].set_title(f"No. of Satellites Overhead per country - {times[0]}")
# Increasing the y-limit of the left plot
ymin, ymax = axs[0].get_ylim()
axs[0].set_ylim(ymin, ymax * 1.3)
# Increasing the x-limit of the left plot
xmin, xmax = axs[0].get_xlim()
axs[0].set_xlim(xmin*0.975, xmax * 1)
add_scalebar(axs[0], length=5556, location=(0.01, 0.05), units='km')
# Plot 2: Natural breaks classification
temporal_satellite_cm.plot(column='2024-03-04T00:00:00Z', ax=axs[1], legend=True,
cmap='YlGnBu', scheme='naturalbreaks', k=5,
edgecolor='black', lw=0.6, legend_kwds={'loc': 'center right'})
axs[1].set_title(f"No. of Satellites Overhead per country (Natural Breaks) - {times[0]}")
add_scalebar(axs[1], length=5556, location=(0.01, 0.05), units='km')
plt.tight_layout()
plt.show()- Distance calculated from equator (0 N, 0 E) to (0 N, 50 E) - https://www.nhc.noaa.gov/gccalc.shtml
Interactive Map
# below map use mapbbox basemap
mbox_url = f'https://api.mapbox.com/styles/v1/mapbox/light-v9/tiles/{{z}}/{{x}}/{{y}}?access_token={mapbox_access_token}'
i_map = folium.Map(location=[35,0], zoom_start=2, tiles = mbox_url, attr='Mapbox')
# below code is for choropleth map to show spatial variations
satellites_map = folium.Choropleth(geo_data=temporal_satellite_cm, data=temporal_satellite_cm, columns=("id_str", "2024-03-04T00:00:00Z"), key_on="feature.id",fill_color="Greens", line_weight=0.6, line_color='black', fill_opacity=0.7,
legend_name=f"No. of Satellites Overhead per Country, {times[0]}",
highlight = True)
satellites_map.add_to(i_map)# Created funtions to add popups and highlight values
folium.GeoJson(temporal_satellite_cm, style_function = custom_style, highlight_function=value_show, tooltip= folium.features.GeoJsonTooltip(fields=['Countries'], aliases=("Country Name:",)),
popup=folium.features.GeoJsonPopup(fields=['Countries','2024-03-04T00:00:00Z', 'Time-averaged'], aliases = ['Country Name: ', 'No. of Satellites at 2024-03-04 T00:00:00 : ', 'No. of Satellites averaged over 24 hrs: '])
).add_to(i_map)
# interactive map
i_map